import csv
import sqlite3
import re
import os
from pathlib import Path

# --- SĀKUMS: CEĻU DEFINĪCIJAS ---
# Nosakām ceļus, balstoties uz šī skripta atrašanās vietu
SCRIPT_DIR = Path(__file__).parent.resolve()

# 1. Definējam izvades mapi 'kods' un datubāzes failu tajā
OUTPUT_DIR = SCRIPT_DIR / 'kods'
DB_FILE = OUTPUT_DIR / 'companies.sqlite'

# 2. CSV fails tagad tiek meklēts 'csv' direktorijā, kas ir vienā līmenī ar skriptu
CSV_FILE = SCRIPT_DIR / 'csv' / 'register.csv'
# --- BEIGAS: CEĻU DEFINĪCIJAS ---


def normalize_for_search(name_str, regcode_for_debug=None):
    if not name_str:
        return ""

    text = str(name_str).lower()
    text = text.replace('\xa0', ' ').replace('\u200b', ' ')
    text = re.sub(r'[„”""\'`\(\)\[\]\{\}:;,./\\|\-]', ' ', text)
    text_after_punctuation_removal = re.sub(r'\s+', ' ', text).strip()

    common_prefixes_suffixes = [
        "ik", "sia", "as", "ps", "ks", "zs", "nodibinājums", "biedrība",
        "individuālais komersants", "sabiedrība ar ierobežotu atbildību",
        "akciju sabiedrība", "pilnsabiedrība", "komandītsabiedrība",
        "zemnieku saimniecība", "kooperatīvā sabiedrība", "filiāle",
        "ārvalsts komersanta filiāle", "individuālais uzņēmums",
        "pašvaldības uzņēmums", "pārstāvniecība", "fonds"
    ]

    words = text_after_punctuation_removal.split()
    cleaned_words = [word for word in words if word not in common_prefixes_suffixes]
    text = " ".join(cleaned_words)
    text = re.sub(r'\s+', ' ', text).strip()

    unique_parts = set()
    if text:
        unique_parts.add(text)
    text_no_space = text.replace(' ', '')
    if text_no_space and text_no_space != text:
        unique_parts.add(text_no_space)

    return " ".join(sorted(list(part for part in unique_parts if part)))


def create_database():
    # Pārliecināmies, ka eksistē izvades direktorija 'kods'
    if not os.path.exists(OUTPUT_DIR):
        try:
            os.makedirs(OUTPUT_DIR, exist_ok=True)
            print(f"Direktorija '{OUTPUT_DIR.name}' izveidota.")
        except OSError as e:
            print(f"Kļūda veidojot direktoriju '{OUTPUT_DIR}': {e}")
            return

    # Dzēšam veco datubāzi, ja tāda eksistē
    if os.path.exists(DB_FILE):
        os.remove(DB_FILE)
        print(f"Vecā datubāze '{DB_FILE.name}' dzēsta.")

    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS companies (
            regcode TEXT PRIMARY KEY,
            original_name TEXT NOT NULL,
            search_helper TEXT NOT NULL
        )
    ''')
    cursor.execute('''
        CREATE VIRTUAL TABLE companies_fts USING fts5(
            search_helper_content,
            content='companies',
            content_rowid='rowid',
            tokenize = "unicode61 remove_diacritics 2"
        )
    ''')
    cursor.execute('''
        CREATE TRIGGER companies_ai AFTER INSERT ON companies BEGIN
            INSERT INTO companies_fts (rowid, search_helper_content)
            VALUES (new.rowid, new.search_helper);
        END;
    ''')
    cursor.execute('''
        CREATE TRIGGER companies_ad AFTER DELETE ON companies BEGIN
            DELETE FROM companies_fts WHERE rowid=old.rowid;
        END;
    ''')
    cursor.execute('''
        CREATE TRIGGER companies_au AFTER UPDATE ON companies BEGIN
            UPDATE companies_fts SET search_helper_content=new.search_helper
            WHERE rowid=old.rowid;
        END;
    ''')
    print(f"Datubāze '{DB_FILE.name}' un tabulas izveidotas mapē '{OUTPUT_DIR.name}'.")

    try:
        if not CSV_FILE.is_file():
            print(f"Kļūda: Avota fails '{CSV_FILE}' nav atrasts.")
            print(f"Pārbaudiet, vai mape '{CSV_FILE.parent}' eksistē un tajā ir fails 'register.csv'.")
            return

        with open(CSV_FILE, mode='r', encoding='utf-8') as csvfile:
            column_names = 'regcode;sepa;name;name_before_quotes;name_in_quotes;name_after_quotes;without_quotes;regtype;regtype_text;type;type_text;registered;terminated;closed;address;index;addressid;region;city;atvk;reregistration_term'.split(';')
            reader = csv.DictReader(csvfile, delimiter=';', fieldnames=column_names)
            
            count = 0
            inserted_count = 0
            
            for row in reader:
                count += 1
                regcode = row.get('regcode', '').strip()
                
                if not (regcode.isdigit() and len(regcode) == 11):
                    continue

                terminated_date_str = row.get('terminated', '').strip()
                if terminated_date_str and terminated_date_str < '2020-01-01':
                    continue
                
                # --- SĀKUMS: PĀRVEIDOTĀ 'original_name' ĢENERĒŠANAS LOĢIKA ---
                type_val = row.get('type', '').strip()
                name_in_quotes_val = row.get('name_in_quotes', '').strip()
                name_before_quotes_val = row.get('name_before_quotes', '').strip()

                if type_val == 'ZEM':
                    parts = []
                    if type_val:
                        parts.append(type_val)
                    if name_in_quotes_val:
                        parts.append(name_in_quotes_val)
                    if name_before_quotes_val:
                        parts.append(name_before_quotes_val)
                    display_name = ", ".join(parts)
                elif type_val and name_in_quotes_val:
                    display_name = f"{type_val}, {name_in_quotes_val}"
                elif name_in_quotes_val:
                    display_name = name_in_quotes_val
                else:
                    display_name = row.get('name', '').strip()

                if not display_name:
                    continue
                # --- BEIGAS: PĀRVEIDOTĀ LOĢIKA ---

                original_full_name = row.get('name', '').strip()
                search_base_name = name_in_quotes_val or name_before_quotes_val or original_full_name

                normalized_name_part = normalize_for_search(search_base_name, regcode)
                if not normalized_name_part and display_name != search_base_name:
                    normalized_name_part = normalize_for_search(display_name, regcode)

                final_search_helper = f"{regcode} {normalized_name_part}"

                try:
                    cursor.execute(
                        'INSERT INTO companies (regcode, original_name, search_helper) VALUES (?, ?, ?)',
                        (regcode, display_name, final_search_helper)
                    )
                    inserted_count +=1
                except sqlite3.IntegrityError:
                    pass
                except Exception as e:
                    print(f"Neizdevās ievietot rindu ar regcode '{regcode}': {e}")

            conn.commit()
            print(f"No '{CSV_FILE.name}' nolasītas rindas: {count}")
            print(f"Datubāzē ievietoti unikāli ieraksti: {inserted_count}")

    except Exception as e:
        print(f"Nezināma kļūda, apstrādājot CSV: {e}")
    finally:
        if conn:
            conn.close()

if __name__ == '__main__':
    create_database()